1 Introduction

12 organoid lines were screened with about 70 compounds (5 concentrations) of the clinical cancer panel. After 7 days total (4 days of treatment) the organoids we lysed and a ctg assay was performed. The experiment was conducted in 2 replciates.

1.1 Prepare data analysis

1.1.1 Load necessary packages for data analysis

#library(tidyverse)
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(corrr)
library(pheatmap)
library(gridExtra)
library(EBImage) #cite
library(HTSvis) #cite
library(PharmacoGx) #cite
library(growthcurve) #cite
library(jsonlite)
library(httr)

1.1.2 Access DGIDB database to annotate compounds in library

To systematically link compounds to a molecular target the DGIDB database is used. Library compounds are linked to affected genes. The result table is manually curated and subsequently loaded.

return_dgidb <- function(input, type = "genes"){
url = "http://dgidb.genome.wustl.edu"
path = paste0("/api/v1/interactions.json",
              "?", type,"=",input %>% str_replace_all(., " ", ""))

response <- GET(url = url, path = path)

response$content %>%
  rawToChar() %>% 
  fromJSON() -> temp

do.call(what = "rbind",
        args = lapply(temp, as.data.frame)) -> temp

    if(nrow(temp) > 0 & !("suggestions" %in% colnames(temp))){
    as.data.frame(temp$interactions) %>%
      as_tibble() %>%
        select(contains("Name"),  source, interactionType, contains("Id")) %>%
        mutate(input = input,
               alt_input = NA,
             input_type = type) %>%
        return()
    } else if(("suggestions" %in% colnames(temp)) & (temp$suggestions %>% unlist %>% is.null() != TRUE)){
      input.alt <- temp$suggestions %>% unlist %>% .[1]
      cat(paste0(c("corrected ", input, "to", input.alt, "\n")))
      path.alt = paste0("/api/v1/interactions.json",
                    "?", type,"=",input.alt %>% str_replace_all(., " ", ""))
      
      GET(url = url, path = path.alt) %>%
        .$content%>%
        rawToChar() %>% 
        fromJSON() -> temp.alt
      
      do.call(what = "rbind",
              args = lapply(temp.alt, as.data.frame)) -> temp.alt
      
        if(nrow(temp.alt) > 0 & !("suggestions" %in% colnames(temp.alt))){
      as.data.frame(temp.alt$interactions) %>%
      as_tibble() %>%
      select(contains("Name"),  source, interactionType, contains("Id")) %>%
      mutate(input = input,
             alt_input = input.alt,
             input_type = type) %>%
        return()
        }
    } else 
      cat(paste0(c("could not find ", input, "\n")))
}

drug_targets.list <- lapply(auc_ccp$drug %>% unique, return_dgidb, type = "drugs")
  
drug_targets <- do.call(what = "rbind",
        args = lapply(drug_targets.list, as.data.frame)) %>%
  #ugly way to fix the problem of changing names (geneName or drugName depending on the input type)
  group_by_(colnames(.)[1], "source", "input") %>% 
          summarize(count = n()) %>%
      group_by_(colnames(.)[1], "input") %>%
      summarize(count = n()) %>%
  filter(count > 0) %>%
  group_by(input) %>%
  summarise(target = geneName[count %>% which.max()],
            count = max(count)) %>%
  arrange(target)

1.1.3 Format and save the ctg datasets

#if(is.na() == FALSE){cut(x, c(0, 18.5, 24.9, 29.9, 34.9, 39.9, Inf), labels = FALSE)} else NA
#run rsync -ravP rindtorf@b110-sc2hn01:/collab-ag-fischer/PROMISE/ctg_data /Users/rindtorf/promise/rsync_data/
#define datasets to load ctg data into R
assay <- "ctg_data/HC1092"
#path <- '/Volumes/collab-bernd-fischer/PROMISE/'
path <- '/Volumes/Macintosh HD/Users/rindtorf/promise/rsync_data/'
pattern <- '.TXT'
filelist <- list.files(paste0(path, assay, "/"), pattern = pattern)
dir <- paste0(path, assay, "/")

#define a function to load ctg files into R once they match the given definitions
load_delim <- function(path, name){
  read_delim(paste0(path, name), 
    "\t", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE)
}

#load ctg data into R
tmp_list <- lapply(filelist, load_delim, path = dir)
ctg <- do.call('rbind', tmp_list)
colnames(ctg) <- c('complete_barcode', 'Well_ID_384', 'photons')

#mutate ctg data to match the annotation file afterwards
ctg <- ctg %>% 
  mutate(complete_barcode.mut = complete_barcode) %>%
  separate(complete_barcode.mut, c("date", "user", "mithras", "plate_barcode")) %>%
  mutate(plate_barcode.mut = plate_barcode) %>%
  separate(plate_barcode.mut, c("line", "plate", "library"), sep = c(7,11))
    
#load tidy dataset with gel and organoid annotation
#load the correct but ill-formatted annotation file 
ccp_lib <- read_delim(paste0(path, "layouts/raw_layout_files/Clin_Cancer_Panel_V170511.csv"), 
    ";", escape_double = FALSE, trim_ws = TRUE) 

colnames(ccp_lib)[c(1, 3, 6)] <- c("Product Name", "concentration_factor", "Well_ID_384")
  
ccp_lib <- ccp_lib %>%  
  select(`Product Name`, Well_ID_384, concentration_factor)# %>%
 # mutate(concentration_factor = replace(concentration_factor, ccp_lib$`Product Name` == "DMSO" | ccp_lib$`Product Name` == "Staurosporine_500nM", NA))

#the list of used compounds was manually changed
drug_targets <- read_delim("~/promise/local_data/annotation/drug_targets.csv", 
    ";", escape_double = FALSE, trim_ws = TRUE) 


colnames(ccp_lib)[1] <- "drug"


#Merge the annotation data of the ccp panel
ctg_ccp <- merge(ctg, ccp_lib, by = 'Well_ID_384') %>%
  merge(., drug_targets, by.x = "drug", by.y = "input") %>%
  select(-drug) %>%
  mutate(drug = input_renamed) %>%
  select(-input_renamed) %>%
  mutate(control = if_else(is.na(concentration_factor), 1, 0),
         drug_conc = paste0(drug, concentration_factor)) %>%
         #replicate = if_else(date %in% c("170516", "170620", "170718", "170704" ), 2, 1) 
  mutate(Well_ID_384.mut = Well_ID_384) %>%
  separate(Well_ID_384.mut, c("letter", "number"), sep = 1) %>%
  mutate(number = as.numeric(number))

#Add a replicate count in a clumsy way
ctg_ccp <- ctg_ccp %>% 
  select(plate, line) %>% 
  group_by(line, plate) %>% 
  sample_n(1) %>%
  mutate(plate.no = substr(plate, 2,4) %>% as.numeric) %>%
  group_by(line, plate) %>%
  summarise(replicate = if_else(plate.no %in% c(6, 12, 4 ), 1,2)) %>% 
  merge(ctg_ccp, .,  by = c("line", "plate"))

#set wells with mistakes to NA and add a dummy variable for handling in HTSVis
ctg_ccp <- ctg_ccp %>%
  mutate(photons = ifelse(plate_barcode == 'D004T01P006L08' & letter %in% LETTERS[c(1, 3, 5, 7, 9, 11, 13, 15)] & number %in% c(1:13), NA, photons)) %>%
  mutate(dummy = 1)
 

#mutate(dat, dist = ifelse(speed == 4, dist * 100, dist)
tmp <- ctg_ccp #%>%
  #filter(date != "170606")

save(tmp, file = paste0("/Volumes/Macintosh HD/Users/rindtorf/promise/local_data/ctg_ccp_unfiltered_", substr(Sys.time(),1,10), ".Rdata"))
#write results file 

ctg_ccp <- ctg_ccp # %>%
  #filter(line != "D022T01")

1.1.4 Load the ctg data back into the session

load(file = paste0("/Volumes/Macintosh HD/Users/rindtorf/promise/local_data/ctg_ccp_unfiltered_", substr(Sys.time(),1,10), ".Rdata"))
ctg_ccp <- tmp

1.1.5 Load clinical data

The primary location of tumors is divided into three sights.

#import clinical data
cohort <- read_excel("~/promise/local_data/clinical_data/Clin_Data_Basic-cohort.xlsx") %>%
  separate(ID, c("p", "no.line"), sep = 1) %>%
  mutate(line = paste0("D", no.line, "T01")) %>% #only works if there were no re-biopsies
  select(-p, -no.line)

cohort_short <- cohort %>%
  select(line, Location) %>%
  #select(line, age, Location, `T`, N, M, Stage, G) %>%
  mutate(Patient = line) %>%
  select(-line)

cohort_short$Location[cohort_short$Location == "Asc"] <- "Right"
cohort_short$Location[cohort_short$Location == "Sigm"] <- "Left"

cohort_short <- cohort_short %>%
  mutate(Location = factor(Location, levels = c("Right", "Left", "Rectum")))
levels(cohort_short$Location)
## [1] "Right"  "Left"   "Rectum"

1.2 Gain an overview about clinical and treatment response features

1.2.1 Show replicate-wise correlation coefficients

# Get lower triangle of the correlation matrix
  get_lower_tri<-function(cormat){
    cormat[upper.tri(cormat)] <- NA
    return(cormat)
  }
  ctg_ccp %>%
  dplyr::group_by(plate_barcode) %>%
  mutate(median = median(photons, na.rm = TRUE)) %>%
  mutate(treat.median = photons/median) %>%
  #filter(date != "170606") %>%
  select(plate_barcode, Well_ID_384, treat.median) %>%
  spread(plate_barcode, treat.median) %>%
  remove_rownames() %>%
  column_to_rownames('Well_ID_384') %>%
  cor(use = "pairwise.complete.obs") %>%
  get_lower_tri() %>%  
  melt() %>%
  ggplot(aes(x= Var1, y = Var2, fill = value)) + 
  geom_tile(color = "white")+
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0.5, limit = c(0,1), space = "Lab", 
   name="Pearson\nCorrelation") +
  theme_minimal()+ 
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 12, hjust = 1))+
 coord_fixed()
## Warning: Setting row names on a tibble is deprecated.

1.2.2 Normalize CTG photon counts by a plates DMSO controls

The 32 DMSO controls yield a reference median which is, across all plates, more robust and greater in relative size than a plate-wise median. Hence the DMSO control dependent median should be used for calibration of plates.

ctrl_tmp <- ctg_ccp %>%
  mutate(control = ifelse(drug == "DMSO", "DMSO", "Treatment")) %>%
  #filter(plate_barcode == "D020T01P004L08") %>%
  filter(control == "DMSO") %>%
  group_by(plate_barcode) %>%
  summarise(ctrl.median = median(photons, na.rm = TRUE),
            ctrl.mad = mad(photons, na.rm = TRUE)) %>%
  arrange(plate_barcode)
  
noctrl_tmp <- ctg_ccp %>%
  mutate(control = ifelse(drug == "DMSO", "DMSO", "Treatment")) %>%
  #filter(plate_barcode == "D020T01P004L08") %>%
  #filter(control == "DMSO") %>%
  group_by(plate_barcode) %>%
  summarise(median = median(photons, na.rm = TRUE),
            mad = mad(photons, na.rm = TRUE)) %>%
  arrange(plate_barcode)

#merge both tables to compare the different median references 
ctrls <- merge(ctrl_tmp, noctrl_tmp, by = "plate_barcode")

#illustrate to what extent the overall plate median is shifted towards more negative values relative to the DMSO based plate median
 ctrls %>%
  mutate(d.median = (median - ctrl.median)/ctrl.median*100,
         d.mad = (mad - ctrl.mad)/ctrl.mad*100,
         ctrl.r.cov = ctrl.mad/ctrl.median,
         r.cov = mad/median) %>%
  ggplot(aes(d.median)) + 
  geom_density(fill = "blue", alpha = 0.7) + 
  xlab("% difference of overall and DMSO plate median across screen") + 
  theme_minimal()

#add the ctrl median as a variable to the ctg_ccp df
ctg_ccp <- merge(ctg_ccp, ctrls, by=c("plate_barcode")) %>%
  #introduce relative photon counts
  mutate(treat.ctrl.median = photons/ctrl.median,
         treat.median = photons/median)

1.2.3 Draw a overall correlation plot of the two screen replicates

tmp <- ctg_ccp %>%
  unite("line_well", c("line", "Well_ID_384")) %>%
  select(line_well, replicate, treat.ctrl.median) %>%
  spread(replicate, treat.ctrl.median) 

tmp.cor<- tmp %>% 
  select(`1`,`2`) %>%
  cor(use = "pairwise.complete.obs") %>%
  .[1,2] %>%
  round(2)

tmp %>%
  ggplot(aes(x = `1`, y = `2`)) + 
  geom_point(alpha = 0.2) + 
  theme_minimal() + 
  ylab("Replicate 2") + 
  xlab("Replicate 1") + 
  ggtitle(paste0("Correlation of Normalized Treatment Reponse, r =", tmp.cor)) + 
  geom_abline(slope = 1) + 
  geom_density2d(alpha = 0.7)
## Warning: Removed 104 rows containing non-finite values (stat_density2d).
## Warning: Removed 104 rows containing missing values (geom_point).

  #stat_binhex() 

1.2.4 Response profiles with molecular subtype

Load CMS subtype data and plot a heatmap with response profiles. The CMS subtype confindence has to be greater than 50% to be shown. The first heatmap shows how some response profiles cluster better by the date of their measurment than their organoid line. The second cluster shows the response profiles in relationship to relevant clinical data.

# import expression data
library(readxl)
subtypes_organoids <- read_excel("~/promise/local_data/expression/subtypes_organoids_28062017.xlsx") %>%
  separate(X__1, c("p.line", "microarray"), sep = "_.", extra = "drop") %>%
  separate(p.line, c("p", "no.line"), sep = 1) %>%
  mutate(line = paste0("D", no.line, "01")) %>%
  select(-p, -no.line)

#The _short table contains only one variable describing the CMS. 
subtypes_short <- subtypes_organoids %>% 
  mutate(Patient = line,
        # CMS = RF.nearestCMS,
         CMS = RF.predictedCMS) %>%
  select(Patient, CMS)



# Test if posterior probs give more information about the tested lines

# Generate annotations for columns
col = data.frame(
    Patient = factor(rep(sort(unique(ctg_ccp$line)), each = 2))
)
col <- merge(col, subtypes_short, by = "Patient", all.x = TRUE)
col <- merge(col, cohort_short, by = "Patient", all.x = TRUE)

col <- col %>%
  mutate(Patient = substr(Patient, 1,5)) %>%
  select(Patient, Location, CMS)

col <- ctg_ccp %>% 
  select(plate_barcode, date, mithras, user) %>% 
  group_by(plate_barcode) %>% 
  sample_n(1) %>% 
  ungroup() %>% 
  select(-plate_barcode) %>% 
  cbind(col, .)
  

rownames(col) = unique(sort(ctg_ccp$plate_barcode))

col_temp <- col  %>%
  select(Patient, date)

#Generate annotation for rows
row <- ctg_ccp %>% 
  group_by(Well_ID_384) %>%
  sample_n(1) %>%
  #mutate(concentration_factor <- as.character(concen))
  select(drug, concentration_factor, Well_ID_384) %>%
  remove_rownames() %>%
  as.data.frame() %>%
  column_to_rownames("Well_ID_384")



ctg_ccp %>%
  group_by(plate_barcode) %>%
  mutate(median = median(photons, na.rm = TRUE)) %>%
  mutate(treat.median = photons/median) %>%
  #filter(date != "170606") %>%
  select(plate_barcode, Well_ID_384, treat.median) %>%
  spread(plate_barcode, treat.median) %>%
  remove_rownames() %>%
  column_to_rownames('Well_ID_384') %>%
  pheatmap( 
  scale = "column",
  cluster_rows = TRUE,
  show_rownames = FALSE,
  show_colnames = FALSE,
  color = colorRampPalette(c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'))(50),
  #color = c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'),
  annotation_col = col_temp#,
  #annotation_row = row
  )
## Warning: Setting row names on a tibble is deprecated.

col_temp <- col  %>%
  select(Patient, Location, CMS)

ctg_ccp %>%
  group_by(plate_barcode) %>%
  mutate(median = median(photons, na.rm = TRUE)) %>%
  mutate(treat.median = photons/median) %>%
  #filter(date != "170606") %>%
  select(plate_barcode, Well_ID_384, treat.median) %>%
  spread(plate_barcode, treat.median) %>%
  remove_rownames() %>%
  column_to_rownames('Well_ID_384') %>%
  pheatmap( 
  scale = "column",
  cluster_rows = TRUE,
  show_rownames = FALSE,
  show_colnames = FALSE,
  color = colorRampPalette(c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'))(50),
  #color = c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'),
  annotation_col = col_temp#,
  #annotation_row = row
  )
## Warning: Setting row names on a tibble is deprecated.

2 Investigate compound activity

2.1 Generate treatment response curves and illustrate compound activity

2.1.1 Generate panel of all screened compounds

Different response profiles are visible

dodge <- position_dodge(width=0.08)

ctg_ccp %>%
  mutate(line = as.factor(line)) %>%
  group_by(line, drug, concentration_factor) %>%
     dplyr::summarise(mean_photons = mean(treat.ctrl.median, na.rm = TRUE),
                     sd_photons = sd(treat.ctrl.median, na.rm = TRUE),
                     range_low_photons = range(treat.median)[1],
                     range_high_photons = range(treat.median)[2]) %>%
    ggplot(aes(y = mean_photons, x = concentration_factor)) + 
    geom_point(position = dodge,  stat = "identity", aes( colour = line))  + 
    geom_path(aes( colour = line), alpha = 1, position = dodge, size = 1) + 
    geom_errorbar(aes(ymin=mean_photons-sd_photons, ymax=mean_photons+sd_photons, group = line), width=0, position = dodge, size = 1, alpha = 0.5) +
    scale_x_log10(breaks = c(7,14)) + 
    ylab("photon count to plate median") + 
    facet_wrap(~drug) + 
    xlab("concentration factor 5-fold") + 
    scale_colour_brewer(palette = "Set3") + 
    theme_minimal()
## Warning: Removed 93 rows containing missing values (geom_errorbar).

2.2 Calculate AUC values to define compound activity

2.2.1 Calculate AUC for all compounds and lines

tmp <- ctg_ccp %>% 
  #filter(line == "D007T01") %>%
  group_by(plate_barcode, drug) %>%
  #select(plate_barcode, drug, concentration_factor, treat.ctrl.median) %>%
  summarise(auc=PharmacoGx::computeAUC(concentration_factor, treat.ctrl.median, trunc = TRUE, area.type = "Fitted", verbose = TRUE, viability_as_pct = FALSE))

auc_ccp <- tmp %>% 
  #filter(drug == 'Nutlin3a') %>%
  separate(plate_barcode, c("line", "plate", "library"), sep = c(7,11), remove = FALSE) 

#store the auc data for efficient knitting
save(auc_ccp, file = paste0("/Volumes/Macintosh HD/Users/rindtorf/promise/local_data/auc_ccp_unfiltered_", substr(Sys.time(),1,10), ".Rdata"))
#other means of AUC calculation can be considered as well

2.2.2 Load auc data for all compounds and lines

#the potent inhibitor Bortezomib leads to a viability of almost 0, scaling to a positive control is currently avoided
ctg_ccp%>%filter(drug == "Bortezomib" & concentration_factor >= 0.2) %>% select(treat.ctrl.median) %>% .$treat.ctrl.median %>% median(na.rm = TRUE)
## [1] 0.006639004
#The name of the auc file is fixed since recalculating it during every run is costly
load(file = paste0("/Volumes/Macintosh HD/Users/rindtorf/promise/local_data/auc_ccp_unfiltered_2017-08-21.Rdata"))

#I don't know why this is necessary
auc_ccp <- auc_cpp

2.2.3 Plot AUC for specific treatments

tmp <- auc_ccp %>%
  filter(drug == 'VX-702') %>%
  group_by(line, drug) %>%
  summarise(auc.mean = mean(auc, na.rm = TRUE),
            auc.min = min(auc),
            auc.max = max(auc))

tmp.thres <- auc_ccp %>%
  filter(drug == 'VX-702') %>%
  group_by(drug) %>%
  summarise(drug.thres = median(auc, na.rm = TRUE) + 2*mad(auc, na.rm = TRUE))# %>%
  #merge(tmp, ., by = "drug")
#tmp.mad <- mad(tmp$auc, na.rm = TRUE)
#tmp.median <- median(tmp$auc, na.rm = TRUE)



dot <-  tmp %>%
  ggplot(aes(y = line, x = auc.mean, color = line)) + 
  geom_point() + 
  theme_minimal() + 
  scale_colour_brewer(palette = "Set3") + 
  geom_vline(data = tmp.thres, aes(xintercept = drug.thres)) + 
  theme(axis.text.y=element_blank()) + 
  facet_wrap(~drug)

dot 

#density <- tmp %>%
#  ggplot(aes(auc)) + 
#  geom_density() + 
#  geom_vline(xintercept = tmp.median + 2*tmp.mad) + 
#  theme_minimal()


#grid.arrange(dot, density,   ncol=1, nrow =2,  heights=c(4, 1.4)) %>%
#  facet_wrap(~ line)

2.2.4 Calculate median centered log AUC for heatmap

auc_ccp_scaled <- auc_ccp %>%
  as_tibble() %>%
  group_by(drug) %>%
  summarise(drug.median = median(auc, na.rm = TRUE),
            drug.mad = mad(auc, na.rm = TRUE)) %>%
  merge(auc_ccp, ., by = "drug") %>% 
  mutate(rz.auc = (auc - drug.median)/drug.mad,
         dif.auc = (auc - drug.median),
         ratio.auc = (auc/drug.median),
         ratio.auc.log = FitAR::glog(ratio.auc)- FitAR::glog(1),
         auc.log = FitAR::glog(auc)) %>% 
  arrange(rz.auc)

2.2.5 Plot an AUC heatmap for all treatments and replicates

row <- ctg_ccp %>%
  group_by(drug) %>%
  summarise(Target = sample(target, 1),
            Pathway = sample(broad_summary, 1)) %>%
  as.data.frame() %>%
  remove_rownames() %>%
  column_to_rownames('drug') %>%
  dplyr::select(Pathway)
  
col <- auc_ccp %>%
  ungroup() %>%
  dplyr::select(plate_barcode, line) %>%
  distinct(plate_barcode, .keep_all = TRUE) %>%
  as.data.frame() %>%
  remove_rownames() %>%
  column_to_rownames('plate_barcode') 


  auc_ccp %>%
  ungroup() %>%
  dplyr::select(drug, plate_barcode, auc) %>%
  spread(plate_barcode, auc) %>%
  remove_rownames() %>%
  column_to_rownames('drug') %>%
  pheatmap( 
  #scale = "column",
  cluster_rows = TRUE,
  show_rownames = TRUE,
  show_colnames = TRUE,
  color = colorRampPalette(rev(c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0')))(15),
  #color = colorRampPalette(c('#f9f902','#000000','#3701f9'))(10),
  #color = c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'),
  annotation_col = col,
  annotation_row = row,
  cutree_rows = 5
  ) 
## Warning: Setting row names on a tibble is deprecated.

3 Statistical testing of AUC

Todo: Associate AUC with organoid genotypes etc.

4 Case Study: Patient D027T01

4.1 Grey background dose-response

dodge <- position_dodge(width=0.15)
ctg_ccp %>%
  #filter(drug %in% c( "Methotrexate", "Birinapant",  "Ponatinib",  "Nutlin3a", 
  #                   "Panobinostat", "Erlotinib")) %>%
  filter(drug %in% c(  "5-FU", "Irinotecan / SN-38",
                    "Gefitinib", "Erlotinib")) %>%
  mutate(line = as.factor(line),
         drug = factor(drug, levels = c("Irinotecan / SN-38", "5-FU", 
                    "Gefitinib", "Erlotinib"))) %>%
  group_by(line, drug, concentration_factor) %>%
     dplyr::summarise(mean_photons = mean(treat.ctrl.median, na.rm = TRUE),
                     sd_photons = sd(treat.ctrl.median, na.rm = TRUE),
                     range_low_photons = range(treat.median)[1],
                     range_high_photons = range(treat.median)[2]) %>%
    ggplot(aes(y = mean_photons, x = concentration_factor)) + 
    geom_point(position = dodge,  stat = "identity", aes( colour = line), size = 2)  + 
    geom_path(aes( colour = line), alpha = 0.6, position = dodge, size = 2) + 
    #geom_errorbar(aes(ymin=mean_photons-sd_photons, ymax=mean_photons+sd_photons, group = line), width=0, position = dodge, size = 1, alpha = 0.5) +
    #geom_ribbon( aes(x=concentration_factor, y=mean_photons, ymin=mean_photons-sd_photons, ymax=mean_photons+sd_photons), alpha=0.2) +
    scale_x_log10(breaks = c(7,14)) + 
    ylab("Photon count normalized to control") + 
    facet_wrap(~drug) + 
    xlab("Concentration factor 5-fold") + 
    scale_color_manual(values=c(rep("#9E9A99", times = 10), "#F0390B", rep("#9E9A99", times = 1))) + 
    theme_minimal()

4.2 Anysis for personalized treatments

To compare a lines AUC with the median AUC of the same treatment a robust z score is calculated. To reduce the amount of false-positives a regression to the mean is guiding the analysis: The replicate with the smallest distance to the group median is defined to be relevant for further analysis. The stronger deviating replicate is discarded.

tmp <- auc_ccp %>%
  #filter(drug == 'DMSO') %>%
  group_by(line, drug) %>%
  summarise(auc.mean = mean(auc, na.rm = TRUE),
            auc.min = min(auc),
            auc.max = max(auc))

tmp.rz <- auc_ccp %>%
  as_tibble() %>%
  group_by(drug) %>%
  summarise(drug.thres = median(auc, na.rm = TRUE) + 2*mad(auc, na.rm = TRUE),
            drug.median = median(auc, na.rm = TRUE),
            drug.mad = mad(auc, na.rm = TRUE)) %>%
  merge(tmp, ., by = "drug") %>% 
  #filter(drug == "Etoposid") %>%
  mutate(rz.auc.mean = (auc.mean - drug.median)/drug.mad,
         rz.auc.min = (auc.min - drug.median)/drug.mad,
         rz.auc.max = (auc.max - drug.median)/drug.mad,
         dif.auc.mean = (auc.mean - drug.median),
         dif.auc.min = (auc.min - drug.median),
         dif.auc.max = (auc.max - drug.median)) %>% 
  arrange(rz.auc.max)

p.rz <- tmp.rz %>%
  filter(!grepl(.$drug, pattern = 'mit')) %>%
  #filter(line == 'D027T01') %>%
  #group_by(line) %>%
  mutate(rz.auc = if_else(rz.auc.min<rz.auc.mean, rz.auc.min, rz.auc.max),
         dif.auc = if_else(dif.auc.min<dif.auc.mean, dif.auc.min, dif.auc.max)) %>%
  filter(drug.mad != 0) %>%
  mutate(responder = if_else(rz.auc > 2, TRUE, FALSE)) 

comp.tmp <- p.rz %>%
  filter(rz.auc > 2, dif.auc > 0.3) %>%
  select(drug) %>% 
  .$drug %>%
  unique()

4.3 Selected compounds are illustrated

dodge <- position_dodge(width=0.15)
ctg_ccp %>%
  filter(drug %in% c( "Oxaliplatin mit 5-FU", comp.tmp)) %>%
  #filter(drug %in% c( "Irinotecan / SN-38", "5-FU", 
  #                  "Gefitinib", "Erlotinib")) %>%
  mutate(line = as.factor(line),
         drug = factor(drug, levels = c( "Oxaliplatin mit 5-FU", comp.tmp))) %>%
  group_by(line, drug, concentration_factor) %>%
     dplyr::summarise(mean_photons = mean(treat.ctrl.median, na.rm = TRUE),
                     sd_photons = sd(treat.ctrl.median, na.rm = TRUE),
                     range_low_photons = range(treat.median)[1],
                     range_high_photons = range(treat.median)[2]) %>%
    ggplot(aes(y = mean_photons, x = concentration_factor)) + 
    geom_point(position = dodge,  stat = "identity", aes( colour = line), size = 2)  + 
    geom_path(aes( colour = line), alpha = 0.6, position = dodge, size = 2) + 
    #geom_errorbar(aes(ymin=mean_photons-sd_photons, ymax=mean_photons+sd_photons, group = line), width=0, position = dodge, size = 1, alpha = 0.5) +
    #geom_ribbon( aes(x=concentration_factor, y=mean_photons, ymin=mean_photons-sd_photons, ymax=mean_photons+sd_photons), alpha=0.2) +
    scale_x_log10(breaks = c(7,14)) + 
    ylab("Photon count normalized to control") + 
    facet_wrap(~drug) + 
    xlab("Concentration factor 5-fold, mean of n=2") + 
    scale_colour_brewer(palette = "Set3") + 
    theme_minimal()

4.4 Draw AUC waterfall on patient level

tmp <- p.rz %>%
  filter(!grepl(.$drug, pattern = 'mit')) %>%
  filter(line == 'D027T01') %>%
  #group_by(line) %>%
  mutate(rz.auc = if_else(rz.auc.min<rz.auc.mean, rz.auc.min, rz.auc.max),
         dif.auc = if_else(dif.auc.min<dif.auc.mean, dif.auc.min, dif.auc.max)) %>%
  filter(drug.mad != 0) %>%
  mutate(responder = if_else(rz.auc > 2, TRUE, FALSE)) 

p.order <- arrange(tmp,desc( rz.auc))$drug
library(ggrepel)
set.seed(42)
#find a way to fix the dif.auc color range so one can see the effect in-vitro
tmp %>%
  arrange(rz.auc) %>%
  mutate(drug = factor(drug, levels = p.order)) %>%
  ggplot(aes(x = drug, y = rz.auc, fill = dif.auc)) + 
  geom_bar(stat = 'identity') + 
  theme_minimal() + 
  scale_fill_gradient(high="red", low="blue") + 
  geom_label_repel(
    data=subset(tmp, rz.auc > 1.5 | rz.auc < -1.5),
    aes(x = drug, y = rz.auc, label = drug),
    fontface = 'bold', color = 'white',
    box.padding = unit(0.35, "lines"),
    point.padding = unit(0.5, "lines"),
    segment.color = 'grey50'
  )

p.order <- arrange(tmp,desc( dif.auc))$drug
library(ggrepel)
set.seed(42)
#find a way to fix the dif.auc color range so one can see the effect in-vitro
tmp %>%
  arrange(rz.auc) %>%
  mutate(drug = factor(drug, levels = p.order)) %>%
  ggplot(aes(x = drug, y = dif.auc, fill = rz.auc)) + 
  geom_bar(stat = 'identity') + 
  theme_minimal() + 
  scale_fill_gradient(high="red", low="blue") + 
  geom_label_repel(
    data=subset(tmp, rz.auc > 1.5 | rz.auc < -1.5),
    aes(x = drug, y = dif.auc, label = drug),
    fontface = 'bold', color = 'white',
    box.padding = unit(0.35, "lines"),
    point.padding = unit(0.5, "lines"),
    segment.color = 'grey50'
  ) + 
  theme(axis.text.x=element_blank()) + 
  ylab("Effect size compared to median AUC") 

5 Work in Progress

drug_targets
## # A tibble: 70 x 7
##          input input_renamed target count  translation narrow_summary
##          <chr>         <chr>  <chr> <int>        <chr>          <chr>
##  1   Dasatinib     Dasatinib   ABL1    11         ABL1           ABL1
##  2   Ponatinib     Ponatinib   ABL1     5         ABL1           ABL1
##  3    AZD 5363      AZD 5363   AKT1     4         AKT1       PI3K/Akt
##  4  Crizotinib    Crizotinib    ALK    11          ALK            ALK
##  5   ALISERTIB     Alisertib  AURKA     8        AURKA          AURKA
##  6 Oxaliplatin   Oxaliplatin   BCL2     1   DNA damage     DNA damage
##  7  Paclitaxel    Paclitaxel   BCL2     4 Cytoskeleton   Cytoskeleton
##  8  Venetoclax    Venetoclax   BCL2     2         BCL2           BCL2
##  9  Birinapant    Birinapant  BIRC2     3 SMAC mimetic   proapoptotic
## 10  Dabrafenib    Dabrafenib   BRAF    11         BRAF           BRAF
## # ... with 60 more rows, and 1 more variables: broad_summary <chr>
select_anno <- drug_targets %>% 
  select(broad_summary) %>%
  group_by(broad_summary) %>%
  summarise(count = n()) %>%
  filter(count > 1)

row <- drug_targets %>% 
  filter(broad_summary %in% select_anno$broad_summary) %>%
  as.data.frame() %>%
  column_to_rownames("input") %>%
  select(broad_summary) %>%
  mutate(broad_summary = factor(broad_summary))

colnames(row) <- c("Target")




# Specify colors
ann_colors <- list(
    Patient = c(RColorBrewer::brewer.pal(12, "Set3")),
    Location = RColorBrewer::brewer.pal(3, "Set1"),
    CMS = RColorBrewer::brewer.pal(3, "Set2")
    #Target = c(RColorBrewer::brewer.pal(12, "Paired"), '#c2c2b2',  '#ff666a',  '#ff0f15'))
)
#names(ann_colors$Target) <- unique(row$Target)
names(ann_colors$Patient) <- unique(col_temp$Patient)
names(ann_colors$Location) <- c("Rectum", "Left", "Right")
names(ann_colors$CMS) <- c("CMS1", "CMS2", "CMS3")


auc_ccp_scaled %>%
  group_by(line, drug) %>%
  summarise(auc.log = mean(auc.log, na.rm = TRUE)) %>%
  select(line, drug, auc.log) %>%
  filter(!drug %in% c("Alpelisib", "DMSO", "Vismodegib")) %>%
  #filter(!grepl(.$drug, pattern = 'mit')) %>%
  spread(line, auc.log) %>%
  remove_rownames() %>%
  column_to_rownames('drug') %>%
  pheatmap( 
  scale = "row",
  cluster_rows = TRUE,
  show_rownames = TRUE,
  show_colnames = FALSE,
  color = colorRampPalette(rev(c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0')))(15),
  #color = colorRampPalette(c('#f9f902','#000000','#3701f9'))(10),
  #color = c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'),
  #annotation_col = col_temp,
  #annotation_row = row,
  cutree_rows = 5
  ) 
## Warning: Setting row names on a tibble is deprecated.

auc_ccp_scaled %>%
  group_by(plate_barcode) %>%
  #mutate(median = median(photons, na.rm = TRUE)) %>%
  select(plate_barcode, drug, auc.log) %>%
  filter(!drug %in% c("Alpelisib", "DMSO", "Vismodegib")) %>%
  #filter(!grepl(.$drug, pattern = 'mit')) %>%
  spread(plate_barcode, auc.log) %>%
  remove_rownames() %>%
  column_to_rownames('drug') %>%
  pheatmap( 
  scale = "row",
  cluster_rows = TRUE,
  show_rownames = TRUE,
  show_colnames = FALSE,
  color = colorRampPalette(rev(c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0')))(15),
  #color = colorRampPalette(c('#f9f902','#000000','#3701f9'))(10),
  #color = c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'),
  annotation_col = col_temp,
  #annotation_row = row,
  cutree_rows = 5,
  annotation_colors = ann_colors
  ) 
## Warning: Setting row names on a tibble is deprecated.

auc_ccp %>% ungroup %>% select(drug) %>%
merge(drug_targets, ., by.x = "input", by.y = "drug", all = TRUE) %>% distinct(input, .keep_all = TRUE) %>%
write.csv(., "~/promise/local_data/annotation/drug_targets_original.csv")

5.1 ongoing

df mit drug, line, auc for each drug test cellline of interest auc ~ drug + cellline.of.interest

Run function to return shortcuts of organoid lines

show_shortcuts <- function(pattern, data, rows = 4){


treatment_of_interest <- as.character(data$drug)[grep(pattern, data$drug)] %>% unique()

defined_m_barcodes <- data %>%
    filter(drug == treatment_of_interest) %>%
    filter(date != "170606") %>% #currently only this makes sense
    separate(plate, c("P", "plate.number") ,1) %>%
    mutate(plate.number_m1 = as.numeric(plate.number)-1) %>%
    mutate(plate.number_m1 =str_pad(plate.number_m1, 3, pad = 0)) %>%
    mutate(plate_barcode_m1 = paste0(line, P, plate.number_m1, library)) %>%
    select(drug, plate_barcode_m1, Well_ID_384, line, concentration_factor) %>%
    arrange(line, plate_barcode_m1, concentration_factor) %>%
    separate(Well_ID_384, c("letter", "number"), 1) %>%
  select(plate_barcode_m1, line) %>%
  group_by(line) %>%
  sample_n(1) %>%
  .$plate_barcode_m1

img.path <- data %>%
    filter(drug == treatment_of_interest) %>%
    filter(date != "170606") %>% #currently only this makes sense
    separate(plate, c("P", "plate.number") ,1) %>%
    mutate(plate.number_m1 = as.numeric(plate.number)-1) %>%
    mutate(plate.number_m1 =str_pad(plate.number_m1, 3, pad = 0)) %>%
    mutate(plate_barcode_m1 = paste0(line, P, plate.number_m1, library)) %>%
    select(drug, plate_barcode_m1, Well_ID_384, line, concentration_factor) %>%
    arrange(line, plate_barcode_m1, concentration_factor) %>%
    separate(Well_ID_384, c("letter", "number"), 1) %>%
    filter(plate_barcode_m1 %in% defined_m_barcodes) %>%
    mutate(path = paste0("~/promise/html/",
                         plate_barcode_m1, 
                         "/",
                         plate_barcode_m1,
                         "_",
                         letter,
                         "_",
                         number,
                         "_1.jpeg"))

if(nrow(img.path) > 41){img.path <- img.path %>% group_by(plate_barcode_m1) %>% sample_n(1) %>%
  arrange(line, plate_barcode_m1)}
   
  img <- readImage(img.path$path,
                   all = TRUE,
                   names = paste0(img.path$drug, "_", img.path$line))
    
  img.rows = rows*-1
  
  display(img[20:209, 20:209,,], method = "raster", all = TRUE, nx = rows, margin = 20, bg = "black")
  
  return(img.path %>%
           select(drug, line))
}

Print separate response curves for selected compounds

for(i in c("Ponatinib", "Afatinib", "AZD 4547")){
dodge <- position_dodge(width=0.15)
p <- ctg_ccp %>%
  group_by(complete_barcode) %>%
  mutate(ctrl.median = ifelse(drug == "DMSO", median(photons, na.rm = TRUE), NA),
         median = median(photons, na.rm = TRUE),
         mad = mad(photons, na.rm = TRUE)) %>%
  mutate(treat.ctrl.median = photons/ctrl.median,
         treat.median = photons/median,
         treat.rob.z = (photons-median)/mad) %>%

  #filter(drug == "Erlotinib") %>%
  #filter(date != "170606") %>%
  filter(drug %in% c(i)) %>%
  mutate(line = as.factor(line)) %>%
  group_by(line, drug, concentration_factor) %>%
     dplyr::summarise(mean_photons = mean(treat.median, na.rm = TRUE),
                     sd_photons = sd(treat.median, na.rm = TRUE),
                     range_low_photons = range(treat.median)[1],
                     range_high_photons = range(treat.median)[2]) %>%
    ggplot(aes(y = mean_photons, x = concentration_factor)) + 
    geom_point(position = dodge,  stat = "identity", aes( colour = line), size = 4)  + 
    geom_path(aes( colour = line), alpha = 0.6, position = dodge, size = 4) + 
    geom_errorbar(aes(ymin=mean_photons-sd_photons, ymax=mean_photons+sd_photons, group = line), width=0, position = dodge, size = 1, alpha = 0.5) +
    scale_x_log10(breaks = c(7,14)) + 
    ylab("photon count normalized to plate median") + 
    facet_wrap(~drug) + 
    xlab("concentration factor 5-fold") + 
    scale_colour_brewer(palette = "Set1") + 
    theme_minimal()

print(p)
}
## Warning in mutate_impl(.data, dots): Unequal factor levels: coercing to
## character
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning in mutate_impl(.data, dots): Unequal factor levels: coercing to
## character
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_errorbar).
## Warning in mutate_impl(.data, dots): Unequal factor levels: coercing to
## character
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_errorbar).

Print shortcuts for Methotrexate treated organoids

show_shortcuts("Metho", ctg_ccp, rows = 5)
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D013T01P007L08/D013T01P007L08_M_01_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D015T01P007L08/D015T01P007L08_P_16_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D018T01P007L08/D018T01P007L08_C_18_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D020T01P007L08/D020T01P007L08_M_01_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D021T01P003L08/D021T01P003L08_O_16_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D022T01P007L08/D022T01P007L08_M_01_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D027T01P007L08/D027T01P007L08_C_18_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D030T01P007L08/D030T01P007L08_P_16_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Adding missing grouping variables: `plate_barcode_m1`

## # A tibble: 12 x 3
## # Groups:   plate_barcode_m1 [12]
##    plate_barcode_m1         drug    line
##               <chr>        <chr>   <chr>
##  1   D004T01P009L08 Methotrexate D004T01
##  2   D007T01P009L08 Methotrexate D007T01
##  3   D010T01P011L08 Methotrexate D010T01
##  4   D013T01P007L08 Methotrexate D013T01
##  5   D015T01P007L08 Methotrexate D015T01
##  6   D018T01P007L08 Methotrexate D018T01
##  7   D019T01P003L08 Methotrexate D019T01
##  8   D020T01P007L08 Methotrexate D020T01
##  9   D021T01P003L08 Methotrexate D021T01
## 10   D022T01P007L08 Methotrexate D022T01
## 11   D027T01P007L08 Methotrexate D027T01
## 12   D030T01P007L08 Methotrexate D030T01

6 Correction of CTG data by proliferation rate

Load CTG Proliferation data into R and perform first annotation of the dataset

#define datasets to load ctg data into R
assay <- "ctg_data/proliferation/proliferation_true"
#path <- '/Volumes/collab-bernd-fischer/PROMISE/'
path <- '/Volumes/Macintosh HD/Users/rindtorf/promise/local_data/'
pattern <- '_Prolif_'
filelist <- list.files(paste0(path, assay, "/"), pattern = pattern, recursive = TRUE, full.names = TRUE) #use this for importing the data
#dir <- paste0(path, assay, "/")

#define a function to load proliferation ctg files into R once they match the given definitions
load_delim <- function(full.name){
  plate_name <- full.name %>% 
    as.tibble() %>% 
    separate(value, c(letters[1:13]), sep = "/") %>% 
    select(m) %>% 
    separate(m, c(letters[1:2]), sep = -5) %>% 
    .$a
  read_delim(full.name, 
    "\t", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE) %>% 
    mutate(plate_name = plate_name)
}

#load ctg data into R
tmp_list <- lapply(filelist, load_delim)
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
ctg_prolif <- do.call('rbind', tmp_list)
colnames(ctg_prolif) <- c('original_name', 'well_id_384', 'photons', "plate_barcode")

#mutate ctg data to harness annotation
 ctg_prolif <- ctg_prolif %>% 
  separate(plate_barcode, c("tmp.plate_barcode", "tmp.mu"), sep = -7, remove = FALSE) %>%
  mutate(tmp.mu = substr(tmp.mu, 2,6)) %>%
  separate(tmp.mu, c("mithras", "user"), sep = "_") %>%
  separate(tmp.plate_barcode, c("date", "experiment", "timepoint", "lines" ), sep= "_", extra = "merge") %>%
  mutate(no_lines = str_count(lines, "D0")) %>%
  separate(well_id_384, c("letter", "number"), sep = 1, remove = FALSE)

Filter the proliferation dataset further by removing empty wells

#data from seeding timepoints was generated in 96 well plates. For now it will be removed from the dataset to enable standardized analysis
tmp <- ctg_prolif %>% 
  filter(! timepoint %in% c("seeding"))

#on two dates (170613 and 170724) not the complete plate was filled with organoids. Here the unseeded wells are removed from the dataset. 
tmp <- tmp %>% 
  filter(!(date %in% c("170613") & number %in% 13:24))%>%
  filter(!(date %in% c("170724") & number %in% 21:24))

#the data has the following distribution 
tmp %>%
  ggplot(aes(photons)) + 
  geom_density(adjust = 0.3) + 
  scale_x_continuous(limits = c(0,50000)) + 
  theme_minimal() + 
  ggtitle("Distribution of Photon counts for all Proliferation Experiments")

#one single well in the dataset has a photon count of exactly 0 (second lowest value being 658). It is set to NA
tmp <- tmp %>% 
  mutate(photons = replace(photons, which(photons == 0), NA))
#During two seeding-dates mistakes were made. These seeding-dates are excluded later during the analysis
  #filter(!(date %in% c("170606", "170515")))

#summary stats for the dataset
summary(tmp$photons)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     658    6170   10100   10802   14618   42090       1
#write tmp to load it into HTSVis 
tmp %>% 
  filter(timepoint != "seeding") %>%
save(., file="tmp.Rdata")

#redefine ctg_prolig
ctg_prolif <- tmp

Complete the annotation of the dataset by linking organoid lines to photon counts and time

#get an overview of the lines and timepoints available. On each seeding-date there was only one set of lines processed and the plate-layout did not change between measurments.
ctg_prolif %>% 
  group_by(date, timepoint) %>%
  summarise(n = n(),
            lines = sample(lines, 1))
## # A tibble: 20 x 4
## # Groups:   date [?]
##      date timepoint     n                                           lines
##     <chr>     <chr> <int>                                           <chr>
##  1 170515        d2   384                 D004T01_D010T01_D007T01_D019T01
##  2 170515        d4   384                 D004T01_D010T01_D007T01_D019T01
##  3 170529        d2   384 D004T01_D007T01_D010T01_D019T01_D022T01_D030T01
##  4 170529        d4   384 D004T01_D007T01_D010T01_D019T01_D022T01_D030T01
##  5 170529        d8   384 D004T01_D007T01_D010T01_D019T01_D022T01_D030T01
##  6 170606        d2   384 D004T01_D007T01_D010T01_D019T01_D022T01_D030T01
##  7 170606        d4   384 D004T01_D007T01_D010T01_D019T01_D022T01_D030T01
##  8 170606        d8   384 D004T01_D007T01_D010T01_D019T01_D022T01_D030T01
##  9 170613        d2   192                         D020T01_D027T01_D030T01
## 10 170613        d4   192                         D020T01_D027T01_D030T01
## 11 170613        d8   192                         D020T01_D027T01_D030T01
## 12 170711        d2   384 D013T01_D015T01_D018T01_D020T01_D021T01_D027T01
## 13 170711        d4   384 D013T01_D015T01_D018T01_D020T01_D021T01_D027T01
## 14 170711        d8   384 D013T01_D015T01_D018T01_D020T01_D021T01_D027T01
## 15 170717        d2   384                 D013T01_D015T01_D018T01_D021T01
## 16 170717        d4   384                 D013T01_D015T01_D018T01_D021T01
## 17 170717        d8   384                 D013T01_D015T01_D018T01_D021T01
## 18 170724        d2   320         D004T01_D007T01_D010T01_D019T01_D022T01
## 19 170724        d4   320         D004T01_D007T01_D010T01_D019T01_D022T01
## 20 170724        d8   320         D004T01_D007T01_D010T01_D019T01_D022T01
#define a function to annotate a dataset for a defined seeding-date
annotate_prolif <- function(df){
  tmp.lines <- df %>% 
    select(lines) %>%
    .$lines %>% #turn output into a string
    unique() %>%
    str_split(., "_") %>%
    unlist()
  
  
  tmp.anno <- tibble(number = df %>% .$number %>%  unique(),
                     ratio = length(number)/length(tmp.lines),
                     anno_lines = rep(tmp.lines, each = ratio),
                     no_lines = df %>% .$no_lines %>% unique(),
                      #for control reasons the product of the columsn per line and the number of lines is calculated
                     anno_number = no_lines*ratio) 
return(tmp.anno)
}

#redefine and annotate the ctg_prolif dataset
ctg_prolif <- ctg_prolif %>%
  group_by(date) %>%
  do(annotate_prolif(.)) %>%
  merge(ctg_prolif, ., by = c("date", "number", "no_lines"))

ctg_prolif %>%
  group_by(date, timepoint, anno_lines) %>% 
  summarise(n = n())
## # A tibble: 98 x 4
## # Groups:   date, timepoint [?]
##      date timepoint anno_lines     n
##     <chr>     <chr>      <chr> <int>
##  1 170515        d2    D004T01    96
##  2 170515        d2    D007T01    96
##  3 170515        d2    D010T01    96
##  4 170515        d2    D019T01    96
##  5 170515        d4    D004T01    96
##  6 170515        d4    D007T01    96
##  7 170515        d4    D010T01    96
##  8 170515        d4    D019T01    96
##  9 170529        d2    D004T01    64
## 10 170529        d2    D007T01    64
## # ... with 88 more rows

Gain a first overview about the proliferation dataset. Proliferation data for lines D015T01 and D030T01 appear discordant. Proliferation data for D019T01, D021T01 and D022T01 appear to follow a linear growth pattern

tmp <- ctg_prolif %>%
  filter(!(number %in% c(1,24) | letter %in% c("A", "P"))) 
  #wells loacted on the edge of the plates are removed. However, this filtration step has no impact on the median photon count per plate.  

tmp %>% 
  group_by(date, timepoint, anno_lines) %>%
  summarise(median = median(photons, na.rm = TRUE),
            mad = mad(photons, na.rm = TRUE)) %>%  
  ggplot(aes(timepoint, median, color = date)) + 
  geom_point() + 
  geom_path(aes(group = date)) + 
  facet_wrap(~anno_lines) + 
  theme_minimal()

tmp <- tmp %>% 
  mutate(timepoint_num = substr(timepoint,2,2) %>% as.numeric(),
         photons_log2 = log2(photons)) %>%
  filter(!(date %in% c("170515", "170606"))) #seeding-dates that contained a protocol-deviation are removed from the dataset

tmp %>% 
  group_by(date, timepoint_num, anno_lines) %>%
  summarise(median = median(photons, na.rm = TRUE),
            mad = mad(photons, na.rm = TRUE)) %>%  
  ggplot(aes(timepoint_num, median, color = date)) + 
  geom_point() + 
  geom_path(aes(group = date)) + 
  facet_wrap(~anno_lines) + 
  theme_minimal() + 
  ggtitle("Proliferation curves for 12 organoid lines") + 
  ylab("Median photon count for each timepoint") + 
  xlab("Time (d)")

tmp %>% 
  group_by(date, timepoint_num, anno_lines) %>%
  summarise(median = median(photons_log2, na.rm = TRUE),
            mad = mad(photons_log2, na.rm = TRUE)) %>%  
  ggplot(aes(timepoint_num, median, color = date)) + 
  geom_point() + 
  geom_path(aes(group = date)) + 
  facet_wrap(~anno_lines) + 
  theme_minimal() + 
  scale_x_continuous(limits = c(2,4), breaks = c(2,4)) + 
  ggtitle("Proliferation curves for 12 organoid lines over 4 days") + 
  ylab("Log2 of median photon count for each timepoint") + 
  xlab("Time (d)")
## Warning: Removed 24 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_path).

ctg_prolif <- tmp

needs code review!

#needs code review!
test <- ctg_prolif %>% 
  select(timepoint_num, date, anno_lines, photons_log2) %>%
  filter(timepoint_num %in% c(2,4)) %>%
  group_by(anno_lines) %>%
  do(fit_growth(df = ., time = timepoint_num, data = photons_log2, model = "linear") %>% broom::tidy()) 

ctg_prolif %>% 
  select(timepoint_num, date, anno_lines, photons_log2) %>%
  group_by(date, timepoint_num, anno_lines) %>%
  ggplot(aes(timepoint_num, photons_log2)) + 
  geom_point() + 
  stat_growthcurve(model = "linear") + 
  facet_wrap(~anno_lines) + 
  theme_minimal() + 
  scale_x_continuous(limits = c(2,4), breaks = c(2,4)) + 
  ggtitle("Proliferation curves for 12 organoid lines over 4 days") + 
  ylab("Log2 of median photon count for each timepoint") + 
  xlab("Time (d)")
## Warning: Removed 1414 rows containing non-finite values (stat_growth_curve).
## Warning: Removed 1414 rows containing missing values (geom_point).

tmp <- test %>%
  group_by(anno_lines) %>%
  summarise(d3 = estimate[1]+estimate[2]*3) %>%
  mutate(d3_lin = 2^d3,
         line = anno_lines) %>%
  select(-anno_lines) %>%
  merge(ctg_ccp, ., by = "line")

head(tmp)
##      line  plate_barcode plate Well_ID_384            complete_barcode
## 1 D004T01 D004T01P006L08  P006         J23 170502_NR_M2_D004T01P006L08
## 2 D004T01 D004T01P006L08  P006         D01 170502_NR_M2_D004T01P006L08
## 3 D004T01 D004T01P006L08  P006         A02 170502_NR_M2_D004T01P006L08
## 4 D004T01 D004T01P006L08  P006         P06 170502_NR_M2_D004T01P006L08
## 5 D004T01 D004T01P006L08  P006         P17 170502_NR_M2_D004T01P006L08
## 6 D004T01 D004T01P006L08  P006         K22 170502_NR_M2_D004T01P006L08
##   photons   date user mithras library concentration_factor target count
## 1    5494 170502   NR      M2     L08               1.0000   AKT1     4
## 2   10486 170502   NR      M2     L08               0.0016   ABL1    11
## 3      NA 170502   NR      M2     L08               0.0016   PTK2     1
## 4   12592 170502   NR      M2     L08               0.0016   <NA>    NA
## 5   11874 170502   NR      M2     L08               1.0000   MGAM     5
## 6    4788 170502   NR      M2     L08               0.2000   PLK1     3
##         translation narrow_summary broad_summary                   drug
## 1              AKT1       PI3K/Akt      PI3K/Akt               AZD 5363
## 2              ABL1           ABL1          ABL1              Dasatinib
## 3               FAK            FAK           FAK             Defactinib
## 4        DNA damage     DNA damage    DNA damage Trifluoridin/Tipiracil
## 5 alpha-Glucosidase   antidiabetic    Metabolism               Miglitol
## 6              PLK1           PLK1    Cell cycle             Volasertib
##   control                    drug_conc letter number replicate dummy
## 1       0                    AZD 53631      J     23         1     1
## 2       0              Dasatinib0.0016      D      1         1     1
## 3       0             Defactinib0.0016      A      2         1     1
## 4       0 Trifluoridin/Tipiracil0.0016      P      6         1     1
## 5       0                    Miglitol1      P     17         1     1
## 6       0                Volasertib0.2      K     22         1     1
##   ctrl.median ctrl.mad median      mad treat.ctrl.median treat.median
## 1        9325 1012.616   8738 1650.134         0.5891689    0.6287480
## 2        9325 1012.616   8738 1650.134         1.1245040    1.2000458
## 3        9325 1012.616   8738 1650.134                NA           NA
## 4        9325 1012.616   8738 1650.134         1.3503485    1.4410620
## 5        9325 1012.616   8738 1650.134         1.2733512    1.3588922
## 6        9325 1012.616   8738 1650.134         0.5134584    0.5479515
##         d3   d3_lin
## 1 12.82019 7232.079
## 2 12.82019 7232.079
## 3 12.82019 7232.079
## 4 12.82019 7232.079
## 5 12.82019 7232.079
## 6 12.82019 7232.079
test <- ctg_ccp %>%
  filter(drug == "DMSO") %>%
  group_by(line, plate) %>%
  summarise(median = median(photons, na.rm = TRUE))
#perhaps the estimation of doubling time is improved if positional effects are respected and every well is handled as a separate experiment
ctg_prolif %>%
  filter(anno_lines == "D030T01" ) %>%
  filter(letter %in% c("B", "C")) %>%
  ggplot(aes(timepoint, photons, color = well_id_384, group = well_id_384)) + 
  geom_point() + 
  geom_line(aes(group = well_id_384)) + 
  #facet_wrap(~anno_lines) + 
  theme_minimal()